/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nutch.net;

import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;

import org.apache.hadoop.conf.Configuration;

import org.apache.nutch.util.NutchConfiguration;

import java.io.BufferedReader;
import java.io.InputStreamReader;

/**
 * Checks one given normalizer or all normalizers.
 */
public class URLNormalizerChecker {

	private Configuration conf;

	public URLNormalizerChecker(Configuration conf) {
		this.conf = conf;
	}

	private void checkOne(String normalizerName, String scope)
			throws Exception {
		URLNormalizer normalizer = null;

		ExtensionPoint point = PluginRepository.get(conf)
				.getExtensionPoint(URLNormalizer.X_POINT_ID);

		if (point == null)
			throw new RuntimeException(
					URLNormalizer.X_POINT_ID + " not found.");

		Extension[] extensions = point.getExtensions();

		for (int i = 0; i < extensions.length; i++) {
			Extension extension = extensions[i];
			normalizer = (URLNormalizer) extension.getExtensionInstance();
			if (normalizer.getClass().getName().equals(normalizerName)) {
				break;
			} else {
				normalizer = null;
			}
		}

		if (normalizer == null)
			throw new RuntimeException(
					"URLNormalizer " + normalizerName + " not found.");

		System.out.println("Checking URLNormalizer " + normalizerName);

		BufferedReader in = new BufferedReader(
				new InputStreamReader(System.in));
		String line;
		while ((line = in.readLine()) != null) {
			String out = normalizer.normalize(line, scope);
			System.out.println(out);
		}
	}

	private void checkAll(String scope) throws Exception {
		System.out.println(
				"Checking combination of all URLNormalizers available");

		BufferedReader in = new BufferedReader(
				new InputStreamReader(System.in));
		String line;
		URLNormalizers normalizers = new URLNormalizers(conf, scope);
		while ((line = in.readLine()) != null) {
			String out = normalizers.normalize(line, scope);
			System.out.println(out);
		}
	}

	public static void main(String[] args) throws Exception {

		String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
				+ "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";

		String normalizerName = null;
		String scope = URLNormalizers.SCOPE_DEFAULT;
		for (int i = 0; i < args.length; i++) {
			if (args[i].equals("-normalizer")) {
				normalizerName = args[++i];
			} else if (args[i].equals("-scope")) {
				scope = args[++i];
			} else {
				System.err.println(usage);
				System.exit(-1);
			}
		}

		URLNormalizerChecker checker = new URLNormalizerChecker(
				NutchConfiguration.create());
		if (normalizerName != null) {
			checker.checkOne(normalizerName, scope);
		} else {
			checker.checkAll(scope);
		}

		System.exit(0);
	}
}
